/******************************************************************************
Input(s): CMF_ASM_1977_2001.dta, CMF_ASM_2002_2009.dta

This .do file has 2 sections.
SECTION 1:
	- Inputs Stata datasets that were created by SAS and combines them
SECTION 2:
	- Deletes administrative records and other questionable/imputed observations
	- Cleans Industry Codes
	- Saves new data file

******************************************************************************/

clear programs
clear all
set more off
set matsize 11000

local source "/FILE PATH GOES HERE/"
local inter "/FILE PATH GOES HERE/"
local tables "/FILE PATH GOES HERE/"
local conc "/FILE PATH GOES HERE/"
local data "/FILE PATH GOES HERE/"



/*SECTION 1: Creating Master Data Set*/
local S1 = 0
/*SECTION 2: Cleaning Industry Codes and Constructing Key Variables*/
local S2 = 0
/*SECTION 3: Creating Industry VA Shares*/
local S3 = 0


/*************************************************************
SECTION 0: Creating Master Data Set
**************************************************************/
if `S1' == 1 {
!gunzip /FILE PATH GOES HERE/CMF_ASM_1977_2001.dta.gz
!gunzip /FILE PATH GOES HERE/CMF_ASM_2002_2009.dta.gz
use "`source'CMF_ASM_2002_2009.dta", clear
tostring sp, replace
	tempfile newer
	save "`newer'"
use "`source'CMF_ASM_1977_2001.dta"
tostring lfo hind cc, replace
append using "`newer'"
!gzip /FILE PATH GOES HERE/CMF_ASM_1977_2001.dta
!gzip /FILE PATH GOES HERE/CMF_ASM_2002_2009.dta
compress
save "`data'CMF_ASM_1977_2009.dta", replace
!gzip /FILE PATH GOES HERE/CMF_ASM_1977_2009.dta
}



/*************************************************************
SECTION 1: Cleaning Industry Codes
**************************************************************/
if `S2' == 1 {
!gunzip /FILE PATH GOES HERE/CMF_ASM_1977_2009.dta.gz
use "`data'CMF_ASM_1977_2009.dta"
!gzip /FILE PATH GOES HERE/CMF_ASM_1977_2009.dta

/*Dropped Administrative Records*/
drop if ar == 1

/*Drop observations deemed unfit for publication*/
drop if tabbed == "N"

gen naics4 = substr(fk_naics02,1,4)
drop if naics4 == ""
gen naics6 = substr(fk_naics02,1,6)
drop if naics6 == ""

/**********************************************************
Code referencing industry codes deleted at Census request
***********************************************************/

/*Drop if industry is not considered manufacturing in 2002*/
gen manuf = substr(fk_naics02,1,1)
drop if manuf!="3"

/*Cleaning Industry Codes*/
/*Part I:
	We focus on the ASM subsample and construct measures of establishment 
	count. We then identify industries that do not have positive 
	establishment counts in all years (33 years max). We then look at the 
	NAICS 2002 code book; for each industry in question, we look to see if 
	this industry's establishments are cross-referenced in another NAICS6 
	industry that shares that same NAICS5 root.
	1) If there are NAICS6 industries in the cross-reference list that are
	also picked up by our filter, we combine the NAICS6 industries into a 
	new industry and re-run the filter. If the newly-combined industry 
	passes the filer, we keep it.
	2) If there are NAICS6 industries in the cross-reference list, we 
	combine the NAICS6 industries into a new industry and re-run the filter.
	If the newly-combined industry passes the filer, we keep it.
	3) If 1) and 2) don't satisfy the filer, or there are no 
	cross-referenced industries, we then drop the NAICS6 in question
	
	Code:
	keep if et==0
	/*Industry-year characteristics*/
	/*Number of establishments*/
	gen temp = 1
	bys year fk_naics02: egen est_count=sum(temp)
	drop temp
	/*Employment Count*/
	gen temp = te
	bys year fk_naics02: egen emp=sum(temp)
	drop temp
	/*Number of years with at least 1 establishment*/
	gen temp = year
	bys fk_naics02 temp: gen temp2 = 1 if _n==1
		replace temp2 = . if temp==.
	bys fk_naics02: egen year_count = sum(temp2)
	drop temp temp2
	
	egen tyf = tag(year fk_naics02)
	egen tf = tag(fk_naics02)
	
	order fk_naics02

/**********************************************************
Code referencing industry codes deleted at Census request
***********************************************************/

	drop if flag1_update==1
	drop flag1_update
	
/*Part II:
	We focus on the ASM subsample and construct measures of establishment 
	count and total employment by industry-year. We then calculate mid-point
	growth rates and identify industry-years matching the following three 
	criteria:
	1) mid-point growth rates of either establishment count or total 
	employment that are greater than 1 or less than -1
	2) moving-average establishment count of at least 25 [window(2 1 2)]
	3) year is 1987, 1988, 1997, 1998
	
	For each industry flagged by the above filter, we then look at the
	NAICS 2002 code book. For each industry in question, we look to see if 
	this industry's establishments are cross-referenced in another NAICS6 
	industry that shares that same NAICS5 root.
	1) If there are NAICS6 industries in the cross-reference list that are
	also picked up by our filter, we combine the NAICS6 industries into a 
	new industry and re-run the filter. If the newly-combined industry 
	passes the filer, we keep it.
	2) If there are NAICS6 industries in the cross-reference list, we 
	combine the NAICS6 industries into a new industry and re-run the filter.
	If the newly-combined industry passes the filer, we keep it.
	3) If 1) and 2) don't satisfy the filer, or there are no 
	cross-referenced industries, we then drop the NAICS6 in question
	
	Code:
	keep if et==0
	/*Industry-year characteristics*/
	/*Number of establishments*/
	gen temp = 1
	bys year fk_naics02: egen est_count=sum(temp)
	drop temp
	/*Employment Count*/
	gen temp = te
	bys year fk_naics02: egen emp=sum(temp)
	drop temp
	
	collapse (mean) est_count emp, by(year fk_naics02)
	sort fk_naics02 year

	encode fk_naics02, gen(naics6)
	tsset naics6 year
		
	gen est_dhs = (est_count-l.est_count)/(0.5*est_count+0.5*l.est_count)
	gen emp_dhs = (emp-l.emp)/(0.5*emp+0.5*l.emp)
	tssmooth ma est_ma = est_count, window(2 1 2)
		
	gen flag1 = 1 if (est_dhs<-1 | est_dhs>1 | emp_dhs<-1 | emp_dhs>1) & est_dhs!=. & emp_dhs!=.
	gen flag2 = 1 if flag==1 & est_ma>25
	gen flag3 = 1 if flag2==1 & (year==1987 | year==1988 | year == 1997 | year==1998)
	br if flag3==1

/**********************************************************
Code referencing industry codes deleted at Census request
***********************************************************/
	drop if flag2_update==1
	drop flag2_update

/*Dropping Observations without LBD numbers*/	
drop if lbdnum == ""

/*Annual aggregate US inflation measures from Penn World Tables*/
merge m:1 year using "`data'priceindex.dta"
drop if _m==2
drop _m

/*Returns to Capital
BLS Table 3.1/6.2/4.1  Capital Income in Billions of Current $
r: capital income/(productive capital stock * capital composition)
	- eqkc or stkc: ratio of capital input/productive stock
	- we think of this as the nominal return to capital
	
R: (nominal interest rate - inflation + depreciation) as r+delta
	- we multiply this object by gross inflation because we ultimately want
	the cost shares to be expresses in the same nominal $ as the labor costs;
	since capital is expressed in terms of real 2005 dollars, this measure of
	inflation converts capital expenditures to today's dollars

The depreciation rate is currently set exogenously at 5%; if variables are used, revisit depreciation rate
*/
gen r_e = eqky/(eqpk*eqkc)
gen r_s = stky/(stpk*stkc)
gen r_all = (eqky+stky)/(eqpk*eqkc+stpk*stkc)
gen R = (r_all-(inf_c-1)+0.05)*inf_c
gen R_e = (r_e-(inf_c-1)+0.05)*inf_c
gen R_s = (r_s-(inf_c-1)+0.05)*inf_c

/*Inputs
Labor Hours: adjust for non-production work using wage share
KE and KS are perpetual-inventory calculations by the US Census
Materials calculated as in Kehrig (2011) appendix*/
gen L = ph*(sw+0.5*ww)/(1.5*ww)
gen KE = ksteq
gen KS = kstst
gen K = ksteq+kstst
	replace K = ksteq if K==. & kstst==.
	replace K = kstst if K==. & ksteq==.
gen M = (mib-mie)/pimat+(cp+cw)/pimat + (cf+ee)/pien
gen M_act = (mib-mie)+(cp+cw) + (cf+ee)

/*Drop establishments for which any input or output variable is missing or negative*/
local keyvars "K L va"
foreach var of local keyvars {
	drop if `var'<=0
	drop if `var'==.
}

/*Outputs:
Real Output created using the model
Y=kappa*(PY)^((sigma-1)/sigma), where kappa is an industry constant
*/
gen PY2 = va
gen Y2_sigmainf = PY2
gen Y2_sigma6 = (PY2)^(6/(6-1))
gen Y2_sigma3 = (PY2)^(3/(3-1))


compress
save "`data'Manufacturing_1977_2009.dta", replace
*!gzip /FILE PATH GOES HERE/Manufacturing_1977_2009.dta
}
/*************************************************************
SECTION 3: Creating Value Added Shares
**************************************************************/
if `S3' == 1 {
!gunzip /FILE PATH GOES HERE/CMF_ASM_1977_2009.dta.gz
use "/FILE PATH GOES HERE/CMF_ASM_1977_2009" , clear
*!gzip /FILE PATH GOES HERE/CMF_ASM_1977_2009.dta

keep ar tabbed et wt fk_naics02 year lbdnum va
drop if year>2007

/*Dropped Administrative Records*/
drop if ar == 1

/*Drop observations deemed unfit for publication*/
drop if tabbed == "N"

/*Key identifiers in ASM/CFM:
ET = 0: plant that is part of ASM in CMF year
AR=1: imputed record
TABBED = N: not deemed fit for published tabulations
*/
keep if et == 0

/*Drop if industry is not considered manufacturing in 2002*/
gen manuf = substr(fk_naics02,1,1)
drop if manuf!="3"

/*Drop if lbdnum is missing*/
drop if lbdnum == ""
replace va = . if va<0
replace wt = 1 if wt<0
gen temp = va*wt
collapse (sum) temp, by(fk year)
rename temp va

/**********************************************************
Code referencing industry codes deleted at Census request
***********************************************************/

collapse (sum) va, by(fk year)
rename va va_fk

save "`data'VA_Shares_fk.dta", replace
}
